#delimit ;
clear all;

* Import data;
  loc date "2021-11-03";
  import delimited "${raw}\spectator_task\all_apps_wide_`date'.csv"             , clear; tempfile wide      ; save `wide'      , replace; 
  import delimited "${raw}\spectator_task\rounds_`date'_cf_implemented.csv"     , clear; tempfile rounds    ; save `rounds'    , replace; 
  import delimited "${raw}\spectator_task\exit_`date'.csv"                      , clear; tempfile exit_q    ; save `exit_q'    , replace;
  import delimited "${raw}\spectator_task\instructions_`date'.csv"              , clear; tempfile instruct  ; save `instruct'  , replace;
  import delimited "${raw}\spectator_task\PageTimes-`date'.csv"                 , clear; tempfile page_times; save `page_times', replace;
  import delimited "${raw}\spectator_task\experiment_ids_with_demographics.csv" , clear; tempfile demog     ; save `demog'     , replace;

* Clean screen times;
use `page_times', clear;

replace page_name = lower(page_name); 
replace page_name = "init_consent"  if inlist("initializeparticipant", "consent");
replace page_name = "decision"      if page_name == "break" & app_name == "rounds";
replace page_name = "comprehension" if regex(page_name,"comprehension");

bys participant_code (epoch_time):  gen time_elapsed       = epoch_time[_n] - epoch_time[_n-1]; 

drop if inlist(page_index,0,1); 

 by  participant_code             : egen avg_time_round     = mean(time_elapsed)     if page_name == "decision";
 by  participant_code             : egen p50_time_round     = median(time_elapsed)   if page_name == "decision";
 by  participant_code             : egen nr_rounds          = max(round_number)      if page_name == "decision";

levelsof page_name, local(pages); foreach p of local pages {; 
	by participant_code: egen time_`p' = total(time_elapsed) if page_name == "`p'"; 
	qui sum time_`p', d; replace time_`p' = r(p95) if time_`p' > r(p95) & time_`p' !=.; 
};

gen time_completed = epoch_time if page_index == 37;

collapse (firstnm) time_* nr_rounds *time_round, by(participant_code);
egen time_total = rsum(time_aboutworkers time_beliefs time_comprehension time_decision time_effort time_yourtask);

rename participant_code participantcode; drop time_elapsed;
save `page_times', replace;

* Merge datasets;
use `instruct' , replace; rename participantlabel userid;
merge 1:1 participantcode using `page_times', nogen;
merge 1:1 participantcode using `exit_q'    , nogen; drop subsessionround_number;
merge 1:m participantcode using `rounds'    , nogen;
merge m:1 userid          using `demog'     , nogen;

* Add worker effort;
rename playerw_id1 id; merge m:1 id using "${data}\worker.dta", keepusing(tasks_comp); drop if _merge == 2; drop _merge id; rename tasks_comp worker1_tasks;
rename playerw_id2 id; merge m:1 id using "${data}\worker.dta", keepusing(tasks_comp); drop if _merge == 2; drop _merge id; rename tasks_comp worker2_tasks;

bys participantcode: gen one = (_n==1);
tab participant_index_in_pages playercondition if one;
ren player* *; rename (participantcode rule) (id rules);

keep if participant_index_in_pages == 38; // Keep only the ones who finished;

* Drop test links;
drop if userid == 70000000 | userid == 70000001;

* Due to the staggered rollout of our recruitment, there were a few instances in which the same set of redistribution decisions were answered by two different spectators.
* In these instances, we dropped the second set of responses from our analysis sample;
bys trial_id: gen N = _N; tab N; 
bys trial_id (time_completed): gen n = _n;
drop if inrange(n,13,24); 
drop n N time_completed; 

foreach v in pi q {; replace `v' = subinstr(`v', "%", "",.); destring `v', replace; }; 
replace pi = . if condition == "CF";
replace redistribute = redistribute/2;
gen shr_red = redistribute/5;

gen player1_won = (outcome1 == "won");

gen     ratio_m = multiplier1/multiplier2 if player1_won == 1;
replace ratio_m = multiplier2/multiplier1 if player1_won == 0;

gen     diff_m = multiplier1 - multiplier2 if player1_won == 1;
replace diff_m = multiplier2 - multiplier1 if player1_won == 0;

gen 	winner_had_low_m = 0;
replace winner_had_low_m = 1 if (player1_won == 1 & multiplier1 < multiplier2);
replace winner_had_low_m = 1 if (player1_won == 0 & multiplier1 > multiplier2);
replace winner_had_low_m = . if condition == "CF";

gen ea = (regex(condition, "EA"));
gen ep = (regex(condition, "EP"));
gen cf = (regex(condition, "CF"));

* Attention;
gen one_attempt = (q1_att == 1 & q2_att == 1 & q3_att == 1 & q4_att == 1);
gen passed_att = (attention_check == 4);
gen more_to_loser = (redistribute > 2.5); bys id: egen shr_moreloser = mean(more_to_loser);
gen moreloser1r     = (shr_moreloser  > 0);


* Final earnings;
gen 	fin_earnings1 = .;
replace fin_earnings1 = 5 - redistribute if player1_won == 1;
replace fin_earnings1 = 0 + redistribute if player1_won == 0;
gen		fin_earnings2 = 5 - fin_earnings1;	
format  fin_earnings* redistribute %10.3g;

gen gini = abs(fin_earnings1-fin_earnings2)/(fin_earnings1+fin_earnings2);

replace pi = q/2 + (100-q)*1 if condition == "CF";
recode pi (50 = 1) (51/54 = 2) (55/59 = 3) (60/64 = 4) (65/69 = 5) (70/74 = 6) (75/79 = 7) (80/84 = 8) (85/89 = 9) (90/94 = 10) (95/99 = 11) (100 = 12), gen(pi_bin);

rename subsessionround_number round_nr; 

* Clean demographics;
label define education
	1 "Less than high school"
	2 "High school diploma"
	3 "Some college but no degree"
	4 "Associate/Junior College degree"
	5 "Bachelor's Degree"
	6 "Master's Degree"
	7 "Doctoral Degree"
	8 "Professional Degree";
label values education education;

label define race
	1 "White"
	2 "Black or African American"
	3 "American Indian or Alaska Native"
	4 "Asian"
	5 "Native Hawaiian or Other Pacific Islander"
	6 "Other";
label values race race;

label define own_rent
	1 "Own"
	2 "Rent"
	3 "Other";
label values own_rent own_rent;

label define hh_income 
	1  "Less than $10,000 "
	2  "$10,000 to $19,999"
	3  "$20,000 to $29,999"
	4  "$30,000 to $39,999"
	5  "$40,000 to $49,999"
	6  "$50,000 to $59,999"
	7  "$60,000 to $74,999"
	8  "$75,000 to $99,999"
	9  "$100,000 to $149,999"
	10 "$150,000 to $199,999"
	11 "$200,000 or more";
label values hh_income hh_income;

rename current_* *; rename residence_* *;
replace region = subinstr(region, "region_", "", .);
recode hispanic (2=0);

drop gender amindian asian hawaiian_pi other_race zipcode own_other_homes children_over18 children_over25 children_18_24 children_6_17 children_under6 county;


#delimit;
keep  id condition rules pi pi_bin q mult* player1_won atten ratio diff_m shr_red redistribute gini time_* nr_rounds *time_round info round_nr luck-avg_eff rules multip* winner_had_low_m mobile random_m percent_chance fair_share_work q*_* ea* ep* cf* is_rand_round one_attempt passed_att more* state-numeracy hh_income worker*_tasks;
order id condition rules pi pi_bin q mult* player1_won atten ratio diff_m shr_red redistribute gini time_* nr_rounds *time_round info round_nr luck-avg_eff rules multip* winner_had_low_m mobile random_m percent_chance fair_share_work q*_* ea* ep* cf* is_rand_round one_attempt passed_att more* state-numeracy hh_income worker*_tasks;
foreach v of varlist _all {; label var `v' ""; }; compress;

save "${data}\spectator.dta", replace;
